📊 Discretizing Continuous Data for the Dynamic Bayesian Networks¶
Import useful libraries
In [ ]:
import os
import glob
import numpy as np
import pandas as pd
import logging
logging.getLogger().setLevel(logging.CRITICAL)
logging.getLogger("matplotlib").setLevel(logging.WARNING)
import warnings
warnings.filterwarnings('ignore')
from utilities import DataVisualizer, DataDiscretizer, DataProcessor, DataEncoder
Define basic folder paths
In [2]:
# Define folder names
DATA_FOLDER_NAME = r".\data"
ORIGINAL_DATASETS_IMOLA_FOLDER_NAME = os.path.join(DATA_FOLDER_NAME, "datasets-imola")
DISCRETIZED_ORIGINAL_DATASETS_IMOLA_FOLDER_NAME = os.path.join(DATA_FOLDER_NAME, "discretized-datasets-imola")
AGGREGATED_DATASETS_IMOLA_FOLDER_NAME = os.path.join(DATA_FOLDER_NAME, "aggregated-datasets-imola")
DISCRETIZED_AGGREGATED_DATASETS_IMOLA_FOLDER_NAME = os.path.join(DATA_FOLDER_NAME, "discretized-aggregated-datasets-imola")
🔔 Analyzing data distributions¶
In [3]:
# Get all CSV dataset files from the specified folder
datasets_imola = glob.glob(os.path.join(ORIGINAL_DATASETS_IMOLA_FOLDER_NAME, "*.csv"))
print(f"📂 Found {len(datasets_imola)} datasets in '{ORIGINAL_DATASETS_IMOLA_FOLDER_NAME}'")
📂 Found 3 datasets in '.\data\datasets-imola'
🔷 Analyzing 20241128-imola dataset¶
In [4]:
# Select the dataset file to analyze
dataset_path = next((file for file in datasets_imola if '20241128-imola.csv' in file), None)
if dataset_path:
print(f"📂 Loading dataset: {dataset_path}")
# Load the dataset into a DataFrame
imola_20241128_df = pd.read_csv(dataset_path)
# Display basic dataset information
print("\n🔍 Dataset Overview:")
print(imola_20241128_df.info())
print("\n📊 First 5 Rows:")
display(imola_20241128_df.head())
print("\n📈 Summary Statistics:")
display(imola_20241128_df.describe())
# Get distribution overview with resulting bins
bins_result = DataVisualizer.plot_distributions_overview(
df=imola_20241128_df,
columns=imola_20241128_df.columns.difference(['InverterFault']).tolist(),
bins='auto',
quantiles=[0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]
)
# Print bin suggestions in a formatted way
print("\n📌 Bin Distributions info:")
for column, stats in bins_result.items():
print(f"\n🔹 {column}:")
for key, value in stats.items():
print(f" - {key}: {value}")
else:
print("⚠ Dataset '20241128-imola.csv' not found in the specified folder.")
📂 Loading dataset: .\data\datasets-imola\20241128-imola.csv 🔍 Dataset Overview: <class 'pandas.core.frame.DataFrame'> RangeIndex: 248448 entries, 0 to 248447 Data columns (total 14 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 BatteryVoltage_V 248448 non-null float64 1 BatteryCurrent_A 248448 non-null float64 2 BatteryPackTemp_C 248448 non-null float64 3 InverterFault 248448 non-null int64 4 InverterSpeed_RearLeft_RPM 248448 non-null float64 5 Inverter_Iq_Ref_RearLeft_A 248448 non-null float64 6 Inverter_Id_Ref_RearLeft_A 248448 non-null float64 7 MotorTemp_RearLeft_C 248448 non-null float64 8 InverterTemp_RearLeft_C 248448 non-null float64 9 InverterSpeed_RearRight_RPM 248448 non-null float64 10 Inverter_Iq_Ref_RearRight_A 248448 non-null float64 11 Inverter_Id_Ref_RearRight_A 248448 non-null float64 12 MotorTemp_RearRight_C 248448 non-null float64 13 InverterTemp_RearRight_C 248448 non-null float64 dtypes: float64(13), int64(1) memory usage: 26.5 MB None 📊 First 5 Rows:
| BatteryVoltage_V | BatteryCurrent_A | BatteryPackTemp_C | InverterFault | InverterSpeed_RearLeft_RPM | Inverter_Iq_Ref_RearLeft_A | Inverter_Id_Ref_RearLeft_A | MotorTemp_RearLeft_C | InverterTemp_RearLeft_C | InverterSpeed_RearRight_RPM | Inverter_Iq_Ref_RearRight_A | Inverter_Id_Ref_RearRight_A | MotorTemp_RearRight_C | InverterTemp_RearRight_C | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 504.26001 | 0.00 | 13.9 | 0 | 0.0 | 0.0 | 0.0 | 13.0 | 15.0 | 0.0 | 0.0 | 0.0 | 14.0 | 15.0 |
| 1 | 504.26001 | 0.00 | 13.9 | 0 | 0.0 | 0.0 | 0.0 | 13.0 | 15.0 | 0.0 | 0.0 | 0.0 | 14.0 | 15.0 |
| 2 | 504.26001 | 0.00 | 13.9 | 0 | 0.0 | 0.0 | 0.0 | 13.0 | 15.0 | 0.0 | 0.0 | 0.0 | 14.0 | 15.0 |
| 3 | 504.26001 | -0.03 | 13.9 | 0 | 0.0 | 0.0 | 0.0 | 13.0 | 15.0 | 0.0 | 0.0 | 0.0 | 14.0 | 15.0 |
| 4 | 504.26001 | -0.03 | 13.9 | 0 | 0.0 | 0.0 | 0.0 | 13.0 | 15.0 | 0.0 | 0.0 | 0.0 | 14.0 | 15.0 |
📈 Summary Statistics:
| BatteryVoltage_V | BatteryCurrent_A | BatteryPackTemp_C | InverterFault | InverterSpeed_RearLeft_RPM | Inverter_Iq_Ref_RearLeft_A | Inverter_Id_Ref_RearLeft_A | MotorTemp_RearLeft_C | InverterTemp_RearLeft_C | InverterSpeed_RearRight_RPM | Inverter_Iq_Ref_RearRight_A | Inverter_Id_Ref_RearRight_A | MotorTemp_RearRight_C | InverterTemp_RearRight_C | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 248448.000000 | 248448.000000 | 248448.000000 | 248448.0 | 248448.000000 | 248448.000000 | 248448.000000 | 248448.000000 | 248448.000000 | 248448.000000 | 248448.000000 | 248448.000000 | 248448.000000 | 248448.000000 |
| mean | 470.132810 | 9.072812 | 22.137357 | 0.0 | 2691.249139 | 6.832681 | -1.531501 | 40.275965 | 29.517537 | 2656.734955 | -6.073359 | -1.456921 | 43.435157 | 29.595223 |
| std | 18.943940 | 19.741638 | 5.606731 | 0.0 | 3324.347981 | 12.631832 | 3.431754 | 14.733796 | 5.621444 | 3253.950091 | 11.886625 | 3.229787 | 16.021885 | 5.499356 |
| min | 413.500000 | -36.689999 | 13.750000 | 0.0 | -76.000000 | -26.750000 | -49.812500 | 12.000000 | 15.000000 | -11.000000 | -50.500000 | -45.375000 | 13.000000 | 15.000000 |
| 25% | 453.799988 | 0.060000 | 16.750000 | 0.0 | 0.000000 | 0.000000 | -2.062500 | 32.000000 | 27.000000 | 0.000000 | -10.812500 | -2.000000 | 34.000000 | 27.000000 |
| 50% | 468.540009 | 0.090000 | 21.959999 | 0.0 | 3.000000 | 0.000000 | 0.000000 | 39.000000 | 30.000000 | 0.000000 | 0.000000 | 0.000000 | 42.000000 | 30.000000 |
| 75% | 485.420013 | 10.660000 | 27.820000 | 0.0 | 6012.000000 | 11.562500 | 0.000000 | 49.000000 | 33.000000 | 5861.000000 | 0.000000 | 0.000000 | 54.000000 | 33.000000 |
| max | 510.779999 | 115.239998 | 29.680000 | 0.0 | 14881.000000 | 52.000000 | 0.000000 | 76.000000 | 45.000000 | 12655.000000 | 32.875000 | 0.000000 | 80.000000 | 44.000000 |
📌 Bin Distributions info: 🔹 BatteryCurrent_A: - num_bins: 6 - quantile_edges: [-36.68999862670898, -0.0900000035762786, 0.0599999986588954, 0.0900000035762786, 10.65999984741211, 34.4900016784668, 115.23999786376952] - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0] 🔹 BatteryPackTemp_C: - num_bins: 6 - quantile_edges: [13.75, 13.899999618530272, 16.75, 21.959999084472656, 27.81999969482422, 29.25, 29.68000030517578] - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0] 🔹 BatteryVoltage_V: - num_bins: 6 - quantile_edges: [413.5, 449.260009765625, 453.7999877929688, 468.5400085449219, 485.4200134277344, 495.6400146484375, 510.7799987792969] - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0] 🔹 InverterSpeed_RearLeft_RPM: - num_bins: 6 - quantile_edges: [-76.0, -2.0, 0.0, 3.0, 6012.0, 7515.0, 14881.0] - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0] 🔹 InverterSpeed_RearRight_RPM: - num_bins: 6 - quantile_edges: [-11.0, 0.0, 0.0, 0.0, 5861.0, 7448.0, 12655.0] - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0] 🔹 InverterTemp_RearLeft_C: - num_bins: 6 - quantile_edges: [15.0, 24.0, 27.0, 30.0, 33.0, 35.0, 45.0] - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0] 🔹 InverterTemp_RearRight_C: - num_bins: 6 - quantile_edges: [15.0, 24.0, 27.0, 30.0, 33.0, 35.0, 44.0] - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0] 🔹 Inverter_Id_Ref_RearLeft_A: - num_bins: 6 - quantile_edges: [-49.8125, -4.6875, -2.0625, 0.0, 0.0, 0.0, 0.0] - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0] 🔹 Inverter_Id_Ref_RearRight_A: - num_bins: 6 - quantile_edges: [-45.375, -4.3125, -2.0, 0.0, 0.0, 0.0, 0.0] - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0] 🔹 Inverter_Iq_Ref_RearLeft_A: - num_bins: 6 - quantile_edges: [-26.75, 0.0, 0.0, 0.0, 11.5625, 26.75, 52.0] - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0] 🔹 Inverter_Iq_Ref_RearRight_A: - num_bins: 6 - quantile_edges: [-50.5, -24.75, -10.8125, 0.0, 0.0, 0.125, 32.875] - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0] 🔹 MotorTemp_RearLeft_C: - num_bins: 6 - quantile_edges: [12.0, 16.0, 32.0, 39.0, 49.0, 61.0, 76.0] - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0] 🔹 MotorTemp_RearRight_C: - num_bins: 6 - quantile_edges: [13.0, 17.0, 34.0, 42.0, 54.0, 65.0, 80.0] - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]
🔷 Analyzing 20250113-imola dataset¶
In [5]:
# Select the dataset file to analyze
dataset_path = next((file for file in datasets_imola if '20250113-imola.csv' in file), None)
if dataset_path:
print(f"📂 Loading dataset: {dataset_path}")
# Load the dataset into a DataFrame
imola_20250113_df = pd.read_csv(dataset_path)
# Display basic dataset information
print("\n🔍 Dataset Overview:")
print(imola_20250113_df.info())
print("\n📊 First 5 Rows:")
display(imola_20250113_df.head())
print("\n📈 Summary Statistics:")
display(imola_20250113_df.describe())
# Get distribution overview with resulting bins
bins_result = DataVisualizer.plot_distributions_overview(
df=imola_20250113_df,
columns=imola_20250113_df.columns.difference(['InverterFault']).tolist(),
bins='auto',
quantiles=[0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]
)
# Print bin suggestions in a formatted way
print("\n📌 Bin Distributions info:")
for column, stats in bins_result.items():
print(f"\n🔹 {column}:")
for key, value in stats.items():
print(f" - {key}: {value}")
else:
print("⚠ Dataset '20250113-imola.csv' not found in the specified folder.")
📂 Loading dataset: .\data\datasets-imola\20250113-imola.csv 🔍 Dataset Overview: <class 'pandas.core.frame.DataFrame'> RangeIndex: 497553 entries, 0 to 497552 Data columns (total 14 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 BatteryVoltage_V 497553 non-null float64 1 BatteryCurrent_A 497553 non-null float64 2 BatteryPackTemp_C 497553 non-null float64 3 InverterFault 497553 non-null int64 4 InverterSpeed_RearLeft_RPM 497553 non-null float64 5 Inverter_Iq_Ref_RearLeft_A 497553 non-null float64 6 Inverter_Id_Ref_RearLeft_A 497553 non-null float64 7 MotorTemp_RearLeft_C 497553 non-null float64 8 InverterTemp_RearLeft_C 497553 non-null float64 9 InverterSpeed_RearRight_RPM 497553 non-null float64 10 Inverter_Iq_Ref_RearRight_A 497553 non-null float64 11 Inverter_Id_Ref_RearRight_A 497553 non-null float64 12 MotorTemp_RearRight_C 497553 non-null float64 13 InverterTemp_RearRight_C 497553 non-null float64 dtypes: float64(13), int64(1) memory usage: 53.1 MB None 📊 First 5 Rows:
| BatteryVoltage_V | BatteryCurrent_A | BatteryPackTemp_C | InverterFault | InverterSpeed_RearLeft_RPM | Inverter_Iq_Ref_RearLeft_A | Inverter_Id_Ref_RearLeft_A | MotorTemp_RearLeft_C | InverterTemp_RearLeft_C | InverterSpeed_RearRight_RPM | Inverter_Iq_Ref_RearRight_A | Inverter_Id_Ref_RearRight_A | MotorTemp_RearRight_C | InverterTemp_RearRight_C | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 502.880005 | 0.03 | 5.19 | 0 | 0.0 | 0.0 | 0.0 | 8.0 | 11.0 | 31.0 | 0.0 | 0.0 | 10.0 | 13.0 |
| 1 | 502.899994 | 0.06 | 5.19 | 0 | 0.0 | 0.0 | 0.0 | 8.0 | 11.0 | 31.0 | 0.0 | 0.0 | 10.0 | 13.0 |
| 2 | 502.899994 | 0.06 | 5.19 | 0 | 3.0 | 0.0 | 0.0 | 8.0 | 11.0 | 29.0 | 0.0 | 0.0 | 10.0 | 13.0 |
| 3 | 502.899994 | 0.06 | 5.19 | 0 | 3.0 | 0.0 | 0.0 | 8.0 | 11.0 | 29.0 | 0.0 | 0.0 | 10.0 | 13.0 |
| 4 | 502.899994 | 0.06 | 5.19 | 0 | 3.0 | 0.0 | 0.0 | 8.0 | 11.0 | 29.0 | 0.0 | 0.0 | 10.0 | 13.0 |
📈 Summary Statistics:
| BatteryVoltage_V | BatteryCurrent_A | BatteryPackTemp_C | InverterFault | InverterSpeed_RearLeft_RPM | Inverter_Iq_Ref_RearLeft_A | Inverter_Id_Ref_RearLeft_A | MotorTemp_RearLeft_C | InverterTemp_RearLeft_C | InverterSpeed_RearRight_RPM | Inverter_Iq_Ref_RearRight_A | Inverter_Id_Ref_RearRight_A | MotorTemp_RearRight_C | InverterTemp_RearRight_C | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 497553.000000 | 497553.000000 | 497553.000000 | 497553.000000 | 497553.000000 | 497553.000000 | 497553.000000 | 497553.000000 | 497553.000000 | 497553.000000 | 497553.000000 | 497553.000000 | 497553.000000 | 497553.000000 |
| mean | 463.542110 | 6.043057 | 15.061768 | 0.116209 | 2167.661799 | 6.023563 | -1.143004 | 33.947387 | 25.935744 | 2133.151913 | -5.894690 | -1.118844 | 37.482093 | 26.616244 |
| std | 20.394413 | 14.703243 | 4.844621 | 0.320475 | 2369.976221 | 11.305793 | 2.497655 | 9.819891 | 5.290186 | 2304.850672 | 11.040044 | 2.452635 | 11.906781 | 5.114772 |
| min | 385.820007 | -13.500000 | 5.190000 | 0.000000 | -1439.000000 | -7.062500 | -52.937500 | 7.000000 | 0.000000 | -573.000000 | -50.500000 | -45.937500 | 9.000000 | 0.000000 |
| 25% | 448.380005 | 0.000000 | 11.490000 | 0.000000 | 0.000000 | 0.000000 | -1.312500 | 29.000000 | 24.000000 | 0.000000 | -7.562500 | -1.312500 | 30.000000 | 24.000000 |
| 50% | 463.579987 | 0.090000 | 15.730000 | 0.000000 | 1748.000000 | 0.000000 | 0.000000 | 33.000000 | 25.000000 | 1740.000000 | 0.000000 | 0.000000 | 36.000000 | 26.000000 |
| 75% | 478.880005 | 4.360000 | 19.270000 | 0.000000 | 4075.000000 | 7.750000 | 0.000000 | 42.000000 | 28.000000 | 4023.000000 | 0.000000 | 0.000000 | 47.000000 | 29.000000 |
| max | 503.100006 | 115.790001 | 23.639999 | 1.000000 | 14967.000000 | 52.000000 | 0.000000 | 57.000000 | 45.000000 | 10113.000000 | 6.875000 | 0.000000 | 67.000000 | 46.000000 |
📌 Bin Distributions info: 🔹 BatteryCurrent_A: - num_bins: 6 - quantile_edges: [-13.5, -0.3899999856948852, 0.0, 0.0900000035762786, 4.360000133514404, 20.559999465942383, 115.79000091552734] - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0] 🔹 BatteryPackTemp_C: - num_bins: 6 - quantile_edges: [5.190000057220459, 7.090000152587891, 11.489999771118164, 15.729999542236328, 19.270000457763672, 20.68000030517578, 23.63999938964844] - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0] 🔹 BatteryVoltage_V: - num_bins: 6 - quantile_edges: [385.8200073242188, 435.2200012207031, 448.3800048828125, 463.5799865722656, 478.8800048828125, 488.8599853515625, 503.1000061035156] - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0] 🔹 InverterSpeed_RearLeft_RPM: - num_bins: 6 - quantile_edges: [-1439.0, -2.0, 0.0, 1748.0, 4075.0, 5399.0, 14967.0] - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0] 🔹 InverterSpeed_RearRight_RPM: - num_bins: 6 - quantile_edges: [-573.0, 0.0, 0.0, 1740.0, 4023.0, 5301.0, 10113.0] - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0] 🔹 InverterTemp_RearLeft_C: - num_bins: 6 - quantile_edges: [0.0, 21.0, 24.0, 25.0, 28.0, 33.0, 45.0] - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0] 🔹 InverterTemp_RearRight_C: - num_bins: 6 - quantile_edges: [0.0, 22.0, 24.0, 26.0, 29.0, 34.0, 46.0] - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0] 🔹 Inverter_Id_Ref_RearLeft_A: - num_bins: 6 - quantile_edges: [-52.9375, -4.0, -1.3125, 0.0, 0.0, 0.0, 0.0] - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0] 🔹 Inverter_Id_Ref_RearRight_A: - num_bins: 6 - quantile_edges: [-45.9375, -3.9375, -1.3125, 0.0, 0.0, 0.0, 0.0] - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0] 🔹 Inverter_Iq_Ref_RearLeft_A: - num_bins: 6 - quantile_edges: [-7.0625, -0.125, 0.0, 0.0, 7.75, 22.8125, 52.0] - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0] 🔹 Inverter_Iq_Ref_RearRight_A: - num_bins: 6 - quantile_edges: [-50.5, -22.25, -7.5625, 0.0, 0.0, 0.125, 6.875] - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0] 🔹 MotorTemp_RearLeft_C: - num_bins: 6 - quantile_edges: [7.0, 25.0, 29.0, 33.0, 42.0, 46.0, 57.0] - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0] 🔹 MotorTemp_RearRight_C: - num_bins: 6 - quantile_edges: [9.0, 25.0, 30.0, 36.0, 47.0, 53.0, 67.0] - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]
🔷 Analyzing 20250114-imola dataset¶
In [6]:
# Select the dataset file to analyze
dataset_path = next((file for file in datasets_imola if '20250114-imola.csv' in file), None)
if dataset_path:
print(f"📂 Loading dataset: {dataset_path}")
# Load the dataset into a DataFrame
imola_20250114_df = pd.read_csv(dataset_path)
# Display basic dataset information
print("\n🔍 Dataset Overview:")
print(imola_20250114_df.info())
print("\n📊 First 5 Rows:")
display(imola_20250114_df.head())
print("\n📈 Summary Statistics:")
display(imola_20250114_df.describe())
# Get distribution overview with resulting bins
bins_result = DataVisualizer.plot_distributions_overview(
df=imola_20250114_df,
columns=imola_20250114_df.columns.difference(['InverterFault']).tolist(),
bins='auto',
quantiles=[0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]
)
# Print bin suggestions in a formatted way
print("\n📌 Bin Distributions info:")
for column, stats in bins_result.items():
print(f"\n🔹 {column}:")
for key, value in stats.items():
print(f" - {key}: {value}")
else:
print("⚠ Dataset '20250114-imola.csv' not found in the specified folder.")
📂 Loading dataset: .\data\datasets-imola\20250114-imola.csv 🔍 Dataset Overview: <class 'pandas.core.frame.DataFrame'> RangeIndex: 603724 entries, 0 to 603723 Data columns (total 14 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 BatteryVoltage_V 603724 non-null float64 1 BatteryCurrent_A 603724 non-null float64 2 BatteryPackTemp_C 603724 non-null float64 3 InverterFault 603724 non-null int64 4 InverterSpeed_RearLeft_RPM 603724 non-null float64 5 Inverter_Iq_Ref_RearLeft_A 603724 non-null float64 6 Inverter_Id_Ref_RearLeft_A 603724 non-null float64 7 MotorTemp_RearLeft_C 603724 non-null float64 8 InverterTemp_RearLeft_C 603724 non-null float64 9 InverterSpeed_RearRight_RPM 603724 non-null float64 10 Inverter_Iq_Ref_RearRight_A 603724 non-null float64 11 Inverter_Id_Ref_RearRight_A 603724 non-null float64 12 MotorTemp_RearRight_C 603724 non-null float64 13 InverterTemp_RearRight_C 603724 non-null float64 dtypes: float64(13), int64(1) memory usage: 64.5 MB None 📊 First 5 Rows:
| BatteryVoltage_V | BatteryCurrent_A | BatteryPackTemp_C | InverterFault | InverterSpeed_RearLeft_RPM | Inverter_Iq_Ref_RearLeft_A | Inverter_Id_Ref_RearLeft_A | MotorTemp_RearLeft_C | InverterTemp_RearLeft_C | InverterSpeed_RearRight_RPM | Inverter_Iq_Ref_RearRight_A | Inverter_Id_Ref_RearRight_A | MotorTemp_RearRight_C | InverterTemp_RearRight_C | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 500.220001 | 0.03 | 17.120001 | 0 | 0.0 | 0.0 | 0.0 | 9.0 | 10.0 | 0.0 | 0.0 | 0.0 | 10.0 | 10.0 |
| 1 | 500.220001 | 0.03 | 17.120001 | 0 | 0.0 | 0.0 | 0.0 | 9.0 | 10.0 | 0.0 | 0.0 | 0.0 | 10.0 | 10.0 |
| 2 | 500.220001 | 0.03 | 17.120001 | 0 | -2.0 | 0.0 | 0.0 | 9.0 | 10.0 | 0.0 | 0.0 | 0.0 | 10.0 | 10.0 |
| 3 | 500.220001 | 0.03 | 17.120001 | 0 | -2.0 | 0.0 | 0.0 | 9.0 | 10.0 | 0.0 | 0.0 | 0.0 | 10.0 | 10.0 |
| 4 | 500.220001 | 0.03 | 17.120001 | 0 | -2.0 | 0.0 | 0.0 | 9.0 | 10.0 | 0.0 | 0.0 | 0.0 | 10.0 | 10.0 |
📈 Summary Statistics:
| BatteryVoltage_V | BatteryCurrent_A | BatteryPackTemp_C | InverterFault | InverterSpeed_RearLeft_RPM | Inverter_Iq_Ref_RearLeft_A | Inverter_Id_Ref_RearLeft_A | MotorTemp_RearLeft_C | InverterTemp_RearLeft_C | InverterSpeed_RearRight_RPM | Inverter_Iq_Ref_RearRight_A | Inverter_Id_Ref_RearRight_A | MotorTemp_RearRight_C | InverterTemp_RearRight_C | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 603724.000000 | 603724.000000 | 603724.000000 | 603724.000000 | 603724.000000 | 603724.000000 | 603724.000000 | 603724.000000 | 603724.000000 | 603724.000000 | 603724.000000 | 603724.000000 | 603724.000000 | 603724.000000 |
| mean | 475.379427 | 5.326524 | 26.169941 | 0.125693 | 1833.484712 | 5.054590 | -0.956816 | 31.860244 | 26.206788 | 1802.161027 | -4.978979 | -0.954104 | 35.229350 | 26.970097 |
| std | 16.640589 | 14.433446 | 5.712431 | 0.331504 | 2463.637554 | 11.118464 | 2.350098 | 11.030861 | 5.909046 | 2405.234361 | 10.923923 | 2.402517 | 12.961616 | 6.125237 |
| min | 405.399994 | -14.930000 | 15.100000 | 0.000000 | -326.000000 | -7.625000 | -52.937500 | 4.000000 | 0.000000 | -239.000000 | -50.375000 | -46.125000 | 4.000000 | 0.000000 |
| 25% | 460.880005 | 0.030000 | 18.950001 | 0.000000 | 0.000000 | 0.000000 | -0.437500 | 27.000000 | 23.000000 | 0.000000 | -2.062500 | -0.437500 | 29.000000 | 24.000000 |
| 50% | 478.940002 | 0.060000 | 28.540001 | 0.000000 | 3.000000 | 0.000000 | 0.000000 | 31.000000 | 26.000000 | 0.000000 | 0.000000 | 0.000000 | 34.000000 | 27.000000 |
| 75% | 488.459991 | 0.700000 | 30.730000 | 0.000000 | 3873.000000 | 1.875000 | 0.000000 | 38.000000 | 30.000000 | 3841.000000 | 0.000000 | 0.000000 | 42.000000 | 31.000000 |
| max | 503.260010 | 116.339996 | 33.150002 | 1.000000 | 15312.000000 | 51.875000 | 0.000000 | 66.000000 | 45.000000 | 11698.000000 | 7.375000 | 0.000000 | 77.000000 | 46.000000 |
📌 Bin Distributions info: 🔹 BatteryCurrent_A: - num_bins: 6 - quantile_edges: [-14.93000030517578, -0.3300000131130218, 0.0299999993294477, 0.0599999986588954, 0.699999988079071, 20.040000915527344, 116.33999633789062] - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0] 🔹 BatteryPackTemp_C: - num_bins: 6 - quantile_edges: [15.100000381469728, 16.969999313354492, 18.950000762939453, 28.540000915527344, 30.729999542236328, 31.200000762939453, 33.150001525878906] - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0] 🔹 BatteryVoltage_V: - num_bins: 6 - quantile_edges: [405.3999938964844, 454.8999938964844, 460.8800048828125, 478.9400024414063, 488.4599914550781, 494.3599853515625, 503.260009765625] - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0] 🔹 InverterSpeed_RearLeft_RPM: - num_bins: 6 - quantile_edges: [-326.0, -2.0, 0.0, 3.0, 3873.0, 5571.0, 15312.0] - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0] 🔹 InverterSpeed_RearRight_RPM: - num_bins: 6 - quantile_edges: [-239.0, 0.0, 0.0, 0.0, 3841.0, 5446.0, 11698.0] - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0] 🔹 InverterTemp_RearLeft_C: - num_bins: 6 - quantile_edges: [0.0, 20.0, 23.0, 26.0, 30.0, 34.0, 45.0] - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0] 🔹 InverterTemp_RearRight_C: - num_bins: 6 - quantile_edges: [0.0, 21.0, 24.0, 27.0, 31.0, 35.0, 46.0] - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0] 🔹 Inverter_Id_Ref_RearLeft_A: - num_bins: 6 - quantile_edges: [-52.9375, -3.875, -0.4375, 0.0, 0.0, 0.0, 0.0] - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0] 🔹 Inverter_Id_Ref_RearRight_A: - num_bins: 6 - quantile_edges: [-46.125, -3.8125, -0.4375, 0.0, 0.0, 0.0, 0.0] - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0] 🔹 Inverter_Iq_Ref_RearLeft_A: - num_bins: 6 - quantile_edges: [-7.625, 0.0, 0.0, 0.0, 1.875, 22.25, 51.875] - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0] 🔹 Inverter_Iq_Ref_RearRight_A: - num_bins: 6 - quantile_edges: [-50.375, -21.875, -2.0625, 0.0, 0.0, 0.0, 7.375] - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0] 🔹 MotorTemp_RearLeft_C: - num_bins: 6 - quantile_edges: [4.0, 19.0, 27.0, 31.0, 38.0, 45.0, 66.0] - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0] 🔹 MotorTemp_RearRight_C: - num_bins: 6 - quantile_edges: [4.0, 19.0, 29.0, 34.0, 42.0, 51.0, 77.0] - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]
🪓 Manual discretization with cut method¶
Define the bins intervals and labels for each feature
In [7]:
# Configure different discretization for each column
discretizion_columns_configs = {
'BatteryCurrent_A': {
'bins': [-np.inf, 0.09, 20., np.inf],
'labels': ['0_Idle', '1_Normal', '2_High']
},
'BatteryPackTemp_C': {
'bins': [-np.inf, 16., 28., np.inf],
'labels': ['0_Low', '1_Normal', '2_High']
},
'BatteryVoltage_V': {
'bins': [-np.inf, 450., 480., np.inf],
'labels': ['0_Low', '1_Medium', '2_High']
},
'InverterSpeed_RearLeft_RPM': {
'bins': [-np.inf, 0., 4000., np.inf],
'labels': ['0_Idle', '1_Normal_Pace', '2_High_Pace']
},
'InverterSpeed_RearRight_RPM': {
'bins': [-np.inf, 0., 4000., np.inf],
'labels': ['0_Idle', '1_Normal_Pace', '2_High_Pace']
},
'InverterTemp_RearLeft_C': {
'bins': [-np.inf, 20., 30., np.inf],
'labels': ['0_Low', '1_Normal', '2_High']
},
'InverterTemp_RearRight_C': {
'bins': [-np.inf, 20., 30., np.inf],
'labels': ['0_Low', '1_Normal', '2_High']
},
'Inverter_Id_Ref_RearLeft_A': {
'bins': [-np.inf, -4., -1., np.inf],
'labels': ['0_Very_Negative', '1_Negative', '2_Close_Zero']
},
'Inverter_Id_Ref_RearRight_A': {
'bins': [-np.inf, -4., -1., np.inf],
'labels': ['0_Very_Negative', '1_Negative', '2_Close_Zero']
},
'Inverter_Iq_Ref_RearRight_A': {
'bins': [-np.inf, -4, 4, np.inf],
'labels': ['0_Negative', '1_Close_Zero', '2_Positive']
},
'Inverter_Iq_Ref_RearLeft_A': {
'bins': [-np.inf, -4, 4, np.inf],
'labels': ['0_Negative', '1_Close_Zero', '2_Positive']
},
'MotorTemp_RearLeft_C': {
'bins': [-np.inf, 25., 55., np.inf],
'labels': ['0_Low', '1_Normal', '2_High']
},
'MotorTemp_RearRight_C': {
'bins': [-np.inf, 25., 55., np.inf],
'labels': ['0_Low', '1_Normal', '2_High']
},
}
🔷 Discretizing 20241128-imola dataset with cut¶
In [8]:
# Perform discretization on numeric columns using predefined bin configurations
discretization_info, discretized_imola_20241128_df = DataDiscretizer.discretize_columns(
df=imola_20241128_df,
columns_config=discretizion_columns_configs,
method='cut'
)
# Display summary of discretization results
print("\n📊 Discretization Overview:")
for column, info in discretization_info.items():
print(f"\n🔹 Column: {column}")
print(f" - Method: {info['method']}")
print(f" - Bin Edges: {info['bin_edges']}")
print(f" - Number of Bins: {info['bin_count']}")
print(" - Value Counts:")
print(info['value_counts'].to_string())
📊 Discretization Overview: 🔹 Column: BatteryCurrent_A - Method: Threshold-based - Bin Edges: [ -inf 0.09 20. inf] - Number of Bins: 3 - Value Counts: BatteryCurrent_A 0_Idle 103222 1_Normal 99478 2_High 45748 🔹 Column: BatteryPackTemp_C - Method: Threshold-based - Bin Edges: [-inf 16. 28. inf] - Number of Bins: 3 - Value Counts: BatteryPackTemp_C 0_Low 44102 1_Normal 143430 2_High 60916 🔹 Column: BatteryVoltage_V - Method: Threshold-based - Bin Edges: [-inf 450. 480. inf] - Number of Bins: 3 - Value Counts: BatteryVoltage_V 0_Low 27047 1_Medium 139398 2_High 82003 🔹 Column: InverterSpeed_RearLeft_RPM - Method: Threshold-based - Bin Edges: [ -inf 0. 4000. inf] - Number of Bins: 3 - Value Counts: InverterSpeed_RearLeft_RPM 0_Idle 89932 1_Normal_Pace 65861 2_High_Pace 92655 🔹 Column: InverterSpeed_RearRight_RPM - Method: Threshold-based - Bin Edges: [ -inf 0. 4000. inf] - Number of Bins: 3 - Value Counts: InverterSpeed_RearRight_RPM 0_Idle 130287 1_Normal_Pace 24379 2_High_Pace 93782 🔹 Column: InverterTemp_RearLeft_C - Method: Threshold-based - Bin Edges: [-inf 20. 30. inf] - Number of Bins: 3 - Value Counts: InverterTemp_RearLeft_C 0_Low 21543 1_Normal 108051 2_High 118854 🔹 Column: InverterTemp_RearRight_C - Method: Threshold-based - Bin Edges: [-inf 20. 30. inf] - Number of Bins: 3 - Value Counts: InverterTemp_RearRight_C 0_Low 21442 1_Normal 104747 2_High 122259 🔹 Column: Inverter_Id_Ref_RearLeft_A - Method: Threshold-based - Bin Edges: [-inf -4. -1. inf] - Number of Bins: 3 - Value Counts: Inverter_Id_Ref_RearLeft_A 0_Very_Negative 33794 1_Negative 48612 2_Close_Zero 166042 🔹 Column: Inverter_Id_Ref_RearRight_A - Method: Threshold-based - Bin Edges: [-inf -4. -1. inf] - Number of Bins: 3 - Value Counts: Inverter_Id_Ref_RearRight_A 0_Very_Negative 29745 1_Negative 55072 2_Close_Zero 163631 🔹 Column: Inverter_Iq_Ref_RearRight_A - Method: Threshold-based - Bin Edges: [-inf -4. 4. inf] - Number of Bins: 3 - Value Counts: Inverter_Iq_Ref_RearRight_A 0_Negative 78422 1_Close_Zero 157497 2_Positive 12529 🔹 Column: Inverter_Iq_Ref_RearLeft_A - Method: Threshold-based - Bin Edges: [-inf -4. 4. inf] - Number of Bins: 3 - Value Counts: Inverter_Iq_Ref_RearLeft_A 0_Negative 8578 1_Close_Zero 159569 2_Positive 80301 🔹 Column: MotorTemp_RearLeft_C - Method: Threshold-based - Bin Edges: [-inf 25. 55. inf] - Number of Bins: 3 - Value Counts: MotorTemp_RearLeft_C 0_Low 31242 1_Normal 178111 2_High 39095 🔹 Column: MotorTemp_RearRight_C - Method: Threshold-based - Bin Edges: [-inf 25. 55. inf] - Number of Bins: 3 - Value Counts: MotorTemp_RearRight_C 0_Low 29742 1_Normal 162912 2_High 55794
In [9]:
# Perform discretization on numeric columns using predefined bin configurations
binarization_info, discretized_imola_20241128_df = DataDiscretizer.binarize_columns(
df=discretized_imola_20241128_df,
columns=['InverterFault'],
thresholds={'InverterFault': 0.5},
)
# Display summary of binarization results
print("\n📊 Binarization Overview:")
for column, info in binarization_info.items():
print(f"\n🔹 Column: {column}")
print(f" - Threshold: {info['threshold']}")
print(f" - True (%): {info['percentage_true']}")
print(" - Value Counts:")
print(info['value_counts'].to_string())
📊 Binarization Overview: 🔹 Column: InverterFault - Threshold: 0.5 - True (%): 0.0 - Value Counts: InverterFault False 248448
In [10]:
# Display first few rows of discretized data
print("\n📋 Sample of Discretized Data:")
display(discretized_imola_20241128_df.head())
📋 Sample of Discretized Data:
| BatteryVoltage_V | BatteryCurrent_A | BatteryPackTemp_C | InverterFault | InverterSpeed_RearLeft_RPM | Inverter_Iq_Ref_RearLeft_A | Inverter_Id_Ref_RearLeft_A | MotorTemp_RearLeft_C | InverterTemp_RearLeft_C | InverterSpeed_RearRight_RPM | Inverter_Iq_Ref_RearRight_A | Inverter_Id_Ref_RearRight_A | MotorTemp_RearRight_C | InverterTemp_RearRight_C | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2_High | 0_Idle | 0_Low | False | 0_Idle | 1_Close_Zero | 2_Close_Zero | 0_Low | 0_Low | 0_Idle | 1_Close_Zero | 2_Close_Zero | 0_Low | 0_Low |
| 1 | 2_High | 0_Idle | 0_Low | False | 0_Idle | 1_Close_Zero | 2_Close_Zero | 0_Low | 0_Low | 0_Idle | 1_Close_Zero | 2_Close_Zero | 0_Low | 0_Low |
| 2 | 2_High | 0_Idle | 0_Low | False | 0_Idle | 1_Close_Zero | 2_Close_Zero | 0_Low | 0_Low | 0_Idle | 1_Close_Zero | 2_Close_Zero | 0_Low | 0_Low |
| 3 | 2_High | 0_Idle | 0_Low | False | 0_Idle | 1_Close_Zero | 2_Close_Zero | 0_Low | 0_Low | 0_Idle | 1_Close_Zero | 2_Close_Zero | 0_Low | 0_Low |
| 4 | 2_High | 0_Idle | 0_Low | False | 0_Idle | 1_Close_Zero | 2_Close_Zero | 0_Low | 0_Low | 0_Idle | 1_Close_Zero | 2_Close_Zero | 0_Low | 0_Low |
In [11]:
encoded_imola20241128_df, encoding_info = DataEncoder.encode_categorical_columns(
df=discretized_imola_20241128_df,
encoding_strategy='ordinal'
)
DataVisualizer.plot_discrete_distributions(
df=encoded_imola20241128_df,
fault_col='InverterFault'
)
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
In [12]:
# Define output file path
output_file = os.path.join(DISCRETIZED_ORIGINAL_DATASETS_IMOLA_FOLDER_NAME, 'discr-20241128-imola.csv')
# Save the discretized dataset
DataProcessor.save_dataset(discretized_imola_20241128_df, output_file, file_format="csv")
print(f"✅ Saved discretized data to: {output_file}")
Dataset with shape (248448, 14), saved successfully at .\data\discretized-datasets-imola\discr-20241128-imola.csv (csv). ✅ Saved discretized data to: .\data\discretized-datasets-imola\discr-20241128-imola.csv
🔷 Discretizing 20250113-imola dataset with cut¶
In [13]:
# Perform discretization on numeric columns using predefined bin configurations
discretization_info, discretized_imola_20250113_df = DataDiscretizer.discretize_columns(
df=imola_20250113_df,
columns_config=discretizion_columns_configs,
method='cut'
)
# Display summary of discretization results
print("\n📊 Discretization Overview:")
for column, info in discretization_info.items():
print(f"\n🔹 Column: {column}")
print(f" - Method: {info['method']}")
print(f" - Bin Edges: {info['bin_edges']}")
print(f" - Number of Bins: {info['bin_count']}")
print(" - Value Counts:")
print(info['value_counts'].to_string())
📊 Discretization Overview: 🔹 Column: BatteryCurrent_A - Method: Threshold-based - Bin Edges: [ -inf 0.09 20. inf] - Number of Bins: 3 - Value Counts: BatteryCurrent_A 0_Idle 227235 1_Normal 219245 2_High 51073 🔹 Column: BatteryPackTemp_C - Method: Threshold-based - Bin Edges: [-inf 16. 28. inf] - Number of Bins: 3 - Value Counts: BatteryPackTemp_C 0_Low 268152 1_Normal 229401 2_High 0 🔹 Column: BatteryVoltage_V - Method: Threshold-based - Bin Edges: [-inf 450. 480. inf] - Number of Bins: 3 - Value Counts: BatteryVoltage_V 0_Low 141034 1_Medium 244765 2_High 111754 🔹 Column: InverterSpeed_RearLeft_RPM - Method: Threshold-based - Bin Edges: [ -inf 0. 4000. inf] - Number of Bins: 3 - Value Counts: InverterSpeed_RearLeft_RPM 0_Idle 147953 1_Normal_Pace 220363 2_High_Pace 129237 🔹 Column: InverterSpeed_RearRight_RPM - Method: Threshold-based - Bin Edges: [ -inf 0. 4000. inf] - Number of Bins: 3 - Value Counts: InverterSpeed_RearRight_RPM 0_Idle 213562 1_Normal_Pace 158162 2_High_Pace 125829 🔹 Column: InverterTemp_RearLeft_C - Method: Threshold-based - Bin Edges: [-inf 20. 30. inf] - Number of Bins: 3 - Value Counts: InverterTemp_RearLeft_C 0_Low 41664 1_Normal 369706 2_High 86183 🔹 Column: InverterTemp_RearRight_C - Method: Threshold-based - Bin Edges: [-inf 20. 30. inf] - Number of Bins: 3 - Value Counts: InverterTemp_RearRight_C 0_Low 32166 1_Normal 369644 2_High 95743 🔹 Column: Inverter_Id_Ref_RearLeft_A - Method: Threshold-based - Bin Edges: [-inf -4. -1. inf] - Number of Bins: 3 - Value Counts: Inverter_Id_Ref_RearLeft_A 0_Very_Negative 50226 1_Negative 90760 2_Close_Zero 356567 🔹 Column: Inverter_Id_Ref_RearRight_A - Method: Threshold-based - Bin Edges: [-inf -4. -1. inf] - Number of Bins: 3 - Value Counts: Inverter_Id_Ref_RearRight_A 0_Very_Negative 48531 1_Negative 92083 2_Close_Zero 356939 🔹 Column: Inverter_Iq_Ref_RearRight_A - Method: Threshold-based - Bin Edges: [-inf -4. 4. inf] - Number of Bins: 3 - Value Counts: Inverter_Iq_Ref_RearRight_A 0_Negative 155024 1_Close_Zero 341904 2_Positive 625 🔹 Column: Inverter_Iq_Ref_RearLeft_A - Method: Threshold-based - Bin Edges: [-inf -4. 4. inf] - Number of Bins: 3 - Value Counts: Inverter_Iq_Ref_RearLeft_A 0_Negative 751 1_Close_Zero 341385 2_Positive 155417 🔹 Column: MotorTemp_RearLeft_C - Method: Threshold-based - Bin Edges: [-inf 25. 55. inf] - Number of Bins: 3 - Value Counts: MotorTemp_RearLeft_C 0_Low 55209 1_Normal 440648 2_High 1696 🔹 Column: MotorTemp_RearRight_C - Method: Threshold-based - Bin Edges: [-inf 25. 55. inf] - Number of Bins: 3 - Value Counts: MotorTemp_RearRight_C 0_Low 50555 1_Normal 412212 2_High 34786
In [14]:
# Perform discretization on numeric columns using predefined bin configurations
binarization_info, discretized_imola_20250113_df = DataDiscretizer.binarize_columns(
df=discretized_imola_20250113_df,
columns=['InverterFault'],
thresholds={'InverterFault': 0.5},
)
# Display summary of binarization results
print("\n📊 Binarization Overview:")
for column, info in binarization_info.items():
print(f"\n🔹 Column: {column}")
print(f" - Threshold: {info['threshold']}")
print(f" - True (%): {info['percentage_true']}")
print(" - Value Counts:")
print(info['value_counts'].to_string())
📊 Binarization Overview: 🔹 Column: InverterFault - Threshold: 0.5 - True (%): 11.620872550260977 - Value Counts: InverterFault False 439733 True 57820
In [15]:
# Display first few rows of discretized data
print("\n📋 Sample of Discretized Data:")
display(discretized_imola_20250113_df.head())
📋 Sample of Discretized Data:
| BatteryVoltage_V | BatteryCurrent_A | BatteryPackTemp_C | InverterFault | InverterSpeed_RearLeft_RPM | Inverter_Iq_Ref_RearLeft_A | Inverter_Id_Ref_RearLeft_A | MotorTemp_RearLeft_C | InverterTemp_RearLeft_C | InverterSpeed_RearRight_RPM | Inverter_Iq_Ref_RearRight_A | Inverter_Id_Ref_RearRight_A | MotorTemp_RearRight_C | InverterTemp_RearRight_C | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2_High | 0_Idle | 0_Low | False | 0_Idle | 1_Close_Zero | 2_Close_Zero | 0_Low | 0_Low | 1_Normal_Pace | 1_Close_Zero | 2_Close_Zero | 0_Low | 0_Low |
| 1 | 2_High | 0_Idle | 0_Low | False | 0_Idle | 1_Close_Zero | 2_Close_Zero | 0_Low | 0_Low | 1_Normal_Pace | 1_Close_Zero | 2_Close_Zero | 0_Low | 0_Low |
| 2 | 2_High | 0_Idle | 0_Low | False | 1_Normal_Pace | 1_Close_Zero | 2_Close_Zero | 0_Low | 0_Low | 1_Normal_Pace | 1_Close_Zero | 2_Close_Zero | 0_Low | 0_Low |
| 3 | 2_High | 0_Idle | 0_Low | False | 1_Normal_Pace | 1_Close_Zero | 2_Close_Zero | 0_Low | 0_Low | 1_Normal_Pace | 1_Close_Zero | 2_Close_Zero | 0_Low | 0_Low |
| 4 | 2_High | 0_Idle | 0_Low | False | 1_Normal_Pace | 1_Close_Zero | 2_Close_Zero | 0_Low | 0_Low | 1_Normal_Pace | 1_Close_Zero | 2_Close_Zero | 0_Low | 0_Low |
In [16]:
# Encode categorical columns using the specified encoding strategy
encoded_imola_20250113_df, _ = DataEncoder.encode_categorical_columns(
df=discretized_imola_20250113_df,
encoding_strategy='ordinal'
)
# Visualize the distribution of discrete variables with respect to 'InverterFault'
DataVisualizer.plot_discrete_distributions(
df=encoded_imola_20250113_df,
fault_col='InverterFault'
)
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
In [17]:
# Define output file path
output_file = os.path.join(DISCRETIZED_ORIGINAL_DATASETS_IMOLA_FOLDER_NAME, 'discr-20250113-imola.csv')
# Save the discretized dataset
DataProcessor.save_dataset(discretized_imola_20250113_df, output_file, file_format="csv")
print(f"✅ Saved discretized data to: {output_file}")
Dataset with shape (497553, 14), saved successfully at .\data\discretized-datasets-imola\discr-20250113-imola.csv (csv). ✅ Saved discretized data to: .\data\discretized-datasets-imola\discr-20250113-imola.csv
🔷 Discretizing 20250114-imola dataset with cut¶
In [18]:
# Perform discretization on numeric columns using predefined bin configurations
discretization_info, discretized_imola_20250114_df = DataDiscretizer.discretize_columns(
df=imola_20250114_df,
columns_config=discretizion_columns_configs,
method='cut'
)
# Display summary of discretization results
print("\n📊 Discretization Overview:")
for column, info in discretization_info.items():
print(f"\n🔹 Column: {column}")
print(f" - Method: {info['method']}")
print(f" - Bin Edges: {info['bin_edges']}")
print(f" - Number of Bins: {info['bin_count']}")
print(" - Value Counts:")
print(info['value_counts'].to_string())
📊 Discretization Overview: 🔹 Column: BatteryCurrent_A - Method: Threshold-based - Bin Edges: [ -inf 0.09 20. inf] - Number of Bins: 3 - Value Counts: BatteryCurrent_A 0_Idle 365956 1_Normal 177326 2_High 60442 🔹 Column: BatteryPackTemp_C - Method: Threshold-based - Bin Edges: [-inf 16. 28. inf] - Number of Bins: 3 - Value Counts: BatteryPackTemp_C 0_Low 21071 1_Normal 240217 2_High 342436 🔹 Column: BatteryVoltage_V - Method: Threshold-based - Bin Edges: [-inf 450. 480. inf] - Number of Bins: 3 - Value Counts: BatteryVoltage_V 0_Low 48085 1_Medium 266979 2_High 288660 🔹 Column: InverterSpeed_RearLeft_RPM - Method: Threshold-based - Bin Edges: [ -inf 0. 4000. inf] - Number of Bins: 3 - Value Counts: InverterSpeed_RearLeft_RPM 0_Idle 220654 1_Normal_Pace 239224 2_High_Pace 143846 🔹 Column: InverterSpeed_RearRight_RPM - Method: Threshold-based - Bin Edges: [ -inf 0. 4000. inf] - Number of Bins: 3 - Value Counts: InverterSpeed_RearRight_RPM 0_Idle 322972 1_Normal_Pace 139467 2_High_Pace 141285 🔹 Column: InverterTemp_RearLeft_C - Method: Threshold-based - Bin Edges: [-inf 20. 30. inf] - Number of Bins: 3 - Value Counts: InverterTemp_RearLeft_C 0_Low 67147 1_Normal 402530 2_High 134047 🔹 Column: InverterTemp_RearRight_C - Method: Threshold-based - Bin Edges: [-inf 20. 30. inf] - Number of Bins: 3 - Value Counts: InverterTemp_RearRight_C 0_Low 56755 1_Normal 391176 2_High 155793 🔹 Column: Inverter_Id_Ref_RearLeft_A - Method: Threshold-based - Bin Edges: [-inf -4. -1. inf] - Number of Bins: 3 - Value Counts: Inverter_Id_Ref_RearLeft_A 0_Very_Negative 58818 1_Negative 72005 2_Close_Zero 472901 🔹 Column: Inverter_Id_Ref_RearRight_A - Method: Threshold-based - Bin Edges: [-inf -4. -1. inf] - Number of Bins: 3 - Value Counts: Inverter_Id_Ref_RearRight_A 0_Very_Negative 57328 1_Negative 73730 2_Close_Zero 472666 🔹 Column: Inverter_Iq_Ref_RearRight_A - Method: Threshold-based - Bin Edges: [-inf -4. 4. inf] - Number of Bins: 3 - Value Counts: Inverter_Iq_Ref_RearRight_A 0_Negative 140283 1_Close_Zero 462711 2_Positive 730 🔹 Column: Inverter_Iq_Ref_RearLeft_A - Method: Threshold-based - Bin Edges: [-inf -4. 4. inf] - Number of Bins: 3 - Value Counts: Inverter_Iq_Ref_RearLeft_A 0_Negative 823 1_Close_Zero 462841 2_Positive 140060 🔹 Column: MotorTemp_RearLeft_C - Method: Threshold-based - Bin Edges: [-inf 25. 55. inf] - Number of Bins: 3 - Value Counts: MotorTemp_RearLeft_C 0_Low 112472 1_Normal 469609 2_High 21643 🔹 Column: MotorTemp_RearRight_C - Method: Threshold-based - Bin Edges: [-inf 25. 55. inf] - Number of Bins: 3 - Value Counts: MotorTemp_RearRight_C 0_Low 84316 1_Normal 483568 2_High 35840
In [19]:
# Perform discretization on numeric columns using predefined bin configurations
binarization_info, discretized_imola_20250114_df = DataDiscretizer.binarize_columns(
df=discretized_imola_20250114_df,
columns=['InverterFault'],
thresholds={'InverterFault': 0.5},
)
# Display summary of binarization results
print("\n📊 Binarization Overview:")
for column, info in binarization_info.items():
print(f"\n🔹 Column: {column}")
print(f" - Threshold: {info['threshold']}")
print(f" - True (%): {info['percentage_true']}")
print(" - Value Counts:")
print(info['value_counts'].to_string())
📊 Binarization Overview: 🔹 Column: InverterFault - Threshold: 0.5 - True (%): 12.56931975538491 - Value Counts: InverterFault False 527840 True 75884
In [20]:
# Display first few rows of discretized data
print("\n📋 Sample of Discretized Data:")
display(discretized_imola_20250114_df.head())
📋 Sample of Discretized Data:
| BatteryVoltage_V | BatteryCurrent_A | BatteryPackTemp_C | InverterFault | InverterSpeed_RearLeft_RPM | Inverter_Iq_Ref_RearLeft_A | Inverter_Id_Ref_RearLeft_A | MotorTemp_RearLeft_C | InverterTemp_RearLeft_C | InverterSpeed_RearRight_RPM | Inverter_Iq_Ref_RearRight_A | Inverter_Id_Ref_RearRight_A | MotorTemp_RearRight_C | InverterTemp_RearRight_C | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2_High | 0_Idle | 1_Normal | False | 0_Idle | 1_Close_Zero | 2_Close_Zero | 0_Low | 0_Low | 0_Idle | 1_Close_Zero | 2_Close_Zero | 0_Low | 0_Low |
| 1 | 2_High | 0_Idle | 1_Normal | False | 0_Idle | 1_Close_Zero | 2_Close_Zero | 0_Low | 0_Low | 0_Idle | 1_Close_Zero | 2_Close_Zero | 0_Low | 0_Low |
| 2 | 2_High | 0_Idle | 1_Normal | False | 0_Idle | 1_Close_Zero | 2_Close_Zero | 0_Low | 0_Low | 0_Idle | 1_Close_Zero | 2_Close_Zero | 0_Low | 0_Low |
| 3 | 2_High | 0_Idle | 1_Normal | False | 0_Idle | 1_Close_Zero | 2_Close_Zero | 0_Low | 0_Low | 0_Idle | 1_Close_Zero | 2_Close_Zero | 0_Low | 0_Low |
| 4 | 2_High | 0_Idle | 1_Normal | False | 0_Idle | 1_Close_Zero | 2_Close_Zero | 0_Low | 0_Low | 0_Idle | 1_Close_Zero | 2_Close_Zero | 0_Low | 0_Low |
In [21]:
# Encode categorical columns using the specified encoding strategy
encoded_imola_20250114_df, _ = DataEncoder.encode_categorical_columns(
df=discretized_imola_20250114_df,
encoding_strategy='ordinal'
)
# Visualize the distribution of discrete variables with respect to 'InverterFault'
DataVisualizer.plot_discrete_distributions(
df=encoded_imola_20250114_df,
fault_col='InverterFault'
)
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
In [22]:
# Define output file path
output_file = os.path.join(DISCRETIZED_ORIGINAL_DATASETS_IMOLA_FOLDER_NAME, 'discr-20250114-imola.csv')
# Save the discretized dataset
DataProcessor.save_dataset(discretized_imola_20250114_df, output_file, file_format="csv")
print(f"✅ Saved discretized data to: {output_file}")
Dataset with shape (603724, 14), saved successfully at .\data\discretized-datasets-imola\discr-20250114-imola.csv (csv). ✅ Saved discretized data to: .\data\discretized-datasets-imola\discr-20250114-imola.csv
🍡 Unsupervised discretization with KMeans method¶
Define the number of bins for each feature
In [30]:
# Define the number of bins for each feature (K-Means discretization)
kmeans_bins = {
'BatteryVoltage_V': 3,
'BatteryCurrent_A': 3,
'BatteryPackTemp_C': 4,
'InverterSpeed_RearLeft_RPM': 6,
'Inverter_Iq_Ref_RearLeft_A': 3,
'Inverter_Id_Ref_RearLeft_A': 6,
'MotorTemp_RearLeft_C': 4,
'InverterTemp_RearLeft_C': 7,
'InverterSpeed_RearRight_RPM': 6,
'Inverter_Iq_Ref_RearRight_A': 3,
'Inverter_Id_Ref_RearRight_A': 6,
'MotorTemp_RearRight_C': 4,
'InverterTemp_RearRight_C': 7,
}
🔷 Discretizing 20241128-imola dataset with KMeans¶
In [31]:
# Perform K-Means discretization
kmeans_discretized_imola_20241128_df, kmeans_result = DataDiscretizer.discretize_kmeans(
df=imola_20241128_df.drop(columns=['InverterFault']),
n_bins=list(kmeans_bins.values()) # Extract bin values in correct order
)
# Display K-Means discretization results in a readable format
print("\n" + "="*80)
print("🔍 K-Means Discretization Summary")
print("="*80)
for col, details in kmeans_result.items():
print(f"\n📌 Feature: {col}")
print("-" * 60)
print(f" 📊 Method: {details['method']}")
print(f" 🔢 Number of Bins: {details['bin_count']}")
print(f" 📍 Bin Edges: {details['bin_edges']}")
print(f" 🎯 Cluster Centers: {details['cluster_centers']}")
print(f" 📈 Value Counts: {details['value_counts']}")
print(f" 🔄 Mapping: {details['mapping']}")
print("-" * 60)
Processing column: BatteryVoltage_V
BatteryVoltage_V: Using provided n_bins: 3
BatteryVoltage_V discretized into 3 bins.
Processing column: BatteryCurrent_A
BatteryCurrent_A: Using provided n_bins: 3
BatteryCurrent_A discretized into 3 bins.
Processing column: BatteryPackTemp_C
BatteryPackTemp_C: Using provided n_bins: 4
BatteryPackTemp_C discretized into 4 bins.
Processing column: InverterSpeed_RearLeft_RPM
InverterSpeed_RearLeft_RPM: Using provided n_bins: 6
InverterSpeed_RearLeft_RPM discretized into 6 bins.
Processing column: Inverter_Iq_Ref_RearLeft_A
Inverter_Iq_Ref_RearLeft_A: Using provided n_bins: 3
Inverter_Iq_Ref_RearLeft_A discretized into 3 bins.
Processing column: Inverter_Id_Ref_RearLeft_A
Inverter_Id_Ref_RearLeft_A: Using provided n_bins: 6
Inverter_Id_Ref_RearLeft_A discretized into 6 bins.
Processing column: MotorTemp_RearLeft_C
MotorTemp_RearLeft_C: Using provided n_bins: 4
MotorTemp_RearLeft_C discretized into 4 bins.
Processing column: InverterTemp_RearLeft_C
InverterTemp_RearLeft_C: Using provided n_bins: 7
InverterTemp_RearLeft_C discretized into 7 bins.
Processing column: InverterSpeed_RearRight_RPM
InverterSpeed_RearRight_RPM: Using provided n_bins: 6
InverterSpeed_RearRight_RPM discretized into 6 bins.
Processing column: Inverter_Iq_Ref_RearRight_A
Inverter_Iq_Ref_RearRight_A: Using provided n_bins: 3
Inverter_Iq_Ref_RearRight_A discretized into 3 bins.
Processing column: Inverter_Id_Ref_RearRight_A
Inverter_Id_Ref_RearRight_A: Using provided n_bins: 6
Inverter_Id_Ref_RearRight_A discretized into 6 bins.
Processing column: MotorTemp_RearRight_C
MotorTemp_RearRight_C: Using provided n_bins: 4
MotorTemp_RearRight_C discretized into 4 bins.
Processing column: InverterTemp_RearRight_C
InverterTemp_RearRight_C: Using provided n_bins: 7
InverterTemp_RearRight_C discretized into 7 bins.
================================================================================
🔍 K-Means Discretization Summary
================================================================================
📌 Feature: BatteryVoltage_V
------------------------------------------------------------
📊 Method: K-Means Binning
🔢 Number of Bins: 3
📍 Bin Edges: [413.5, 457.6799926757813, 479.6400146484375, 510.7799987792969]
🎯 Cluster Centers: [450.8841660643744, 493.16844684195354, 470.7280725710292]
📈 Value Counts: {0: 94204, 1: 77240, 2: 77004}
🔄 Mapping: {0: (413.5, 457.6799926757813), 1: (457.6799926757813, 479.6400146484375), 2: (479.6400146484375, 510.7799987792969)}
------------------------------------------------------------
📌 Feature: BatteryCurrent_A
------------------------------------------------------------
📊 Method: K-Means Binning
🔢 Number of Bins: 3
📍 Bin Edges: [-36.68999862670898, 0.0599999986588954, 0.6200000047686626, 115.23999786376952]
🎯 Cluster Centers: [0.5426547254437413, 30.27464127164916, 77.05537592619686]
📈 Value Counts: {0: 194991, 1: 42069, 2: 11388}
🔄 Mapping: {0: (-36.68999862670898, 0.0599999986588954), 1: (0.0599999986588954, 0.6200000047686626), 2: (0.6200000047686626, 115.23999786376952)}
------------------------------------------------------------
📌 Feature: BatteryPackTemp_C
------------------------------------------------------------
📊 Method: K-Means Binning
🔢 Number of Bins: 4
📍 Bin Edges: [13.75, 16.75, 21.959999084472656, 27.81999969482422, 29.68000030517578]
🎯 Cluster Centers: [28.616491927578554, 15.3777839255211, 24.45629642325104, 20.78825607443877]
📈 Value Counts: {0: 82233, 1: 81874, 2: 36646, 3: 47695}
🔄 Mapping: {0: (13.75, 16.75), 1: (16.75, 21.959999084472656), 2: (21.959999084472656, 27.81999969482422), 3: (27.81999969482422, 29.68000030517578)}
------------------------------------------------------------
📌 Feature: InverterSpeed_RearLeft_RPM
------------------------------------------------------------
📊 Method: K-Means Binning
🔢 Number of Bins: 6
📍 Bin Edges: [-76.0, -1.0, 0.0, 3.0, 4769.0, 6867.0, 14881.0]
🎯 Cluster Centers: [20.62962072867913, 6629.352946016759, 2974.976158847773, 10430.721695374417, 4995.9910128912, 8034.712264545499]
📈 Value Counts: {0: 141459, 1: 36646, 2: 14210, 3: 5106, 4: 27153, 5: 23874}
🔄 Mapping: {0: (-76.0, -1.0), 1: (-1.0, 0.0), 2: (0.0, 3.0), 3: (3.0, 4769.0), 4: (4769.0, 6867.0), 5: (6867.0, 14881.0)}
------------------------------------------------------------
📌 Feature: Inverter_Iq_Ref_RearLeft_A
------------------------------------------------------------
📊 Method: K-Means Binning
🔢 Number of Bins: 3
📍 Bin Edges: [-26.75, 0.0, 2.8125, 52.0]
🎯 Cluster Centers: [-0.03620901408857424, 37.62631716450713, 17.729650426414203]
📈 Value Counts: {0: 178110, 1: 22966, 2: 47372}
🔄 Mapping: {0: (-26.75, 0.0), 1: (0.0, 2.8125), 2: (2.8125, 52.0)}
------------------------------------------------------------
📌 Feature: Inverter_Id_Ref_RearLeft_A
------------------------------------------------------------
📊 Method: K-Means Binning
🔢 Number of Bins: 6
📍 Bin Edges: [-49.8125, -3.4375, -0.9375, 0.0, 0.0, 0.0, 0.0]
🎯 Cluster Centers: [-0.10410681716289272, -8.536405968033035, -4.8914872472839095, -35.04382313231325, -20.93918398768281, -2.425968266831328]
📈 Value Counts: {0: 171611, 1: 7758, 2: 27798, 3: 1111, 4: 1299, 5: 38871}
🔄 Mapping: {0: (-49.8125, -3.4375), 1: (-3.4375, -0.9375), 2: (-0.9375, 0.0), 3: (0.0, 0.0), 4: (0.0, 0.0), 5: (0.0, 0.0)}
------------------------------------------------------------
📌 Feature: MotorTemp_RearLeft_C
------------------------------------------------------------
📊 Method: K-Means Binning
🔢 Number of Bins: 4
📍 Bin Edges: [12.0, 32.0, 39.0, 49.0, 76.0]
🎯 Cluster Centers: [33.73333615158966, 63.31082826929664, 13.909595926208517, 45.822453446143655]
📈 Value Counts: {0: 94621, 1: 41798, 2: 30242, 3: 81787}
🔄 Mapping: {0: (12.0, 32.0), 1: (32.0, 39.0), 2: (39.0, 49.0), 3: (49.0, 76.0)}
------------------------------------------------------------
📌 Feature: InverterTemp_RearLeft_C
------------------------------------------------------------
📊 Method: K-Means Binning
🔢 Number of Bins: 7
📍 Bin Edges: [15.0, 24.0, 28.0, 29.0, 31.0, 33.0, 34.0, 45.0]
🎯 Cluster Centers: [28.29360564341743, 33.80628629996717, 16.793900570946256, 24.46665411207726, 41.71512081060014, 30.985814302383712, 37.24841229286772]
📈 Value Counts: {0: 52734, 1: 55613, 2: 21543, 3: 37171, 4: 7698, 5: 59990, 6: 13699}
🔄 Mapping: {0: (15.0, 24.0), 1: (24.0, 28.0), 2: (28.0, 29.0), 3: (29.0, 31.0), 4: (31.0, 33.0), 5: (33.0, 34.0), 6: (34.0, 45.0)}
------------------------------------------------------------
📌 Feature: InverterSpeed_RearRight_RPM
------------------------------------------------------------
📊 Method: K-Means Binning
🔢 Number of Bins: 6
📍 Bin Edges: [-11.0, 0.0, 0.0, 0.0, 4794.0, 6693.0, 12655.0]
🎯 Cluster Centers: [18.35260185393554, 7562.477915137873, 6057.463134942713, 2586.238049940551, 9584.833135938898, 4453.473028507175]
📈 Value Counts: {0: 141088, 1: 31002, 2: 37179, 3: 8321, 4: 7599, 5: 23259}
🔄 Mapping: {0: (-11.0, 0.0), 1: (0.0, 0.0), 2: (0.0, 0.0), 3: (0.0, 4794.0), 4: (4794.0, 6693.0), 5: (6693.0, 12655.0)}
------------------------------------------------------------
📌 Feature: Inverter_Iq_Ref_RearRight_A
------------------------------------------------------------
📊 Method: K-Means Binning
🔢 Number of Bins: 3
📍 Bin Edges: [-50.5, -1.5625, 0.0, 32.875]
🎯 Cluster Centers: [0.3667427478488543, -16.387898260201688, -34.52940134037582]
📈 Value Counts: {0: 178154, 1: 47017, 2: 23277}
🔄 Mapping: {0: (-50.5, -1.5625), 1: (-1.5625, 0.0), 2: (0.0, 32.875)}
------------------------------------------------------------
📌 Feature: Inverter_Id_Ref_RearRight_A
------------------------------------------------------------
📊 Method: K-Means Binning
🔢 Number of Bins: 6
📍 Bin Edges: [-45.375, -3.1875, -1.0625, 0.0, 0.0, 0.0, 0.0]
🎯 Cluster Centers: [-2.194971661268907, -0.08769584661903052, -31.31338304552601, -4.467119527206286, -18.326174242424344, -7.674653057119889]
📈 Value Counts: {0: 41992, 1: 167478, 2: 1274, 3: 28596, 4: 1650, 5: 7458}
🔄 Mapping: {0: (-45.375, -3.1875), 1: (-3.1875, -1.0625), 2: (-1.0625, 0.0), 3: (0.0, 0.0), 4: (0.0, 0.0), 5: (0.0, 0.0)}
------------------------------------------------------------
📌 Feature: MotorTemp_RearRight_C
------------------------------------------------------------
📊 Method: K-Means Binning
🔢 Number of Bins: 4
📍 Bin Edges: [13.0, 34.0, 42.0, 54.0, 80.0]
🎯 Cluster Centers: [50.81639795517266, 36.31441086634455, 68.33492622608333, 14.891567480341838]
📈 Value Counts: {0: 76290, 1: 101819, 2: 40597, 3: 29742}
🔄 Mapping: {0: (13.0, 34.0), 1: (34.0, 42.0), 2: (42.0, 54.0), 3: (54.0, 80.0)}
------------------------------------------------------------
📌 Feature: InverterTemp_RearRight_C
------------------------------------------------------------
📊 Method: K-Means Binning
🔢 Number of Bins: 7
📍 Bin Edges: [15.0, 24.0, 28.0, 29.0, 31.0, 33.0, 34.0, 44.0]
🎯 Cluster Centers: [40.15338860850781, 30.95595754944509, 24.60902791639487, 15.345998848588128, 34.208817829458326, 28.27070743333499, 19.113807704225827]
📈 Value Counts: {0: 11096, 1: 62190, 2: 35069, 3: 12159, 4: 68112, 5: 50139, 6: 9683}
🔄 Mapping: {0: (15.0, 24.0), 1: (24.0, 28.0), 2: (28.0, 29.0), 3: (29.0, 31.0), 4: (31.0, 33.0), 5: (33.0, 34.0), 6: (34.0, 44.0)}
------------------------------------------------------------
In [33]:
# Perform discretization on numeric columns using predefined bin configurations
binarization_info, binarized_fault_df = DataDiscretizer.binarize_columns(
df=imola_20241128_df['InverterFault'].to_frame(),
columns=['InverterFault'],
thresholds={'InverterFault': 0.5},
labels={'InverterFault': [0, 1]}
)
# Add the binzarized column to discretized dataframe
kmeans_discretized_imola_20241128_df['InverterFault'] = binarized_fault_df['InverterFault']
# Display summary of binarization results
print("\n📊 Binarization Overview:")
for column, info in binarization_info.items():
print(f"\n🔹 Column: {column}")
print(f" - Threshold: {info['threshold']}")
print(f" - True (%): {info['percentage_true']}")
print(" - Value Counts:")
print(info['value_counts'].to_string())
📊 Binarization Overview: 🔹 Column: InverterFault - Threshold: 0.5 - True (%): 0.0 - Value Counts: InverterFault 0 248448
In [34]:
# Encode categorical columns using the specified encoding strategy
encoded_imola_20241128_df, _ = DataEncoder.encode_categorical_columns(
df=kmeans_discretized_imola_20241128_df,
encoding_strategy='ordinal'
)
# Visualize the distribution of discrete variables with respect to 'InverterFault'
DataVisualizer.plot_discrete_distributions(
df=encoded_imola_20241128_df,
fault_col='InverterFault'
)
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
In [35]:
# Define output file path
output_file = os.path.join(DISCRETIZED_ORIGINAL_DATASETS_IMOLA_FOLDER_NAME, 'kmeans', 'discr-20241128-imola.csv')
# Save the discretized dataset
DataProcessor.save_dataset(kmeans_discretized_imola_20241128_df, output_file, file_format="csv")
print(f"✅ Saved discretized (KMeans) data to: {output_file}")
Dataset with shape (248448, 14), saved successfully at .\data\discretized-datasets-imola\kmeans\discr-20241128-imola.csv (csv). ✅ Saved discretized (KMeans) data to: .\data\discretized-datasets-imola\kmeans\discr-20241128-imola.csv
🔷 Discretizing 20250113-imola dataset with KMeans¶
In [36]:
# Perform K-Means discretization
kmeans_discretized_imola_20250113_df, kmeans_result = DataDiscretizer.discretize_kmeans(
df=imola_20250113_df.drop(columns=['InverterFault']),
n_bins=list(kmeans_bins.values()) # Extract bin values in correct order
)
# Display K-Means discretization results in a readable format
print("\n" + "="*80)
print("🔍 K-Means Discretization Summary")
print("="*80)
for col, details in kmeans_result.items():
print(f"\n📌 Feature: {col}")
print("-" * 60)
print(f" 📊 Method: {details['method']}")
print(f" 🔢 Number of Bins: {details['bin_count']}")
print(f" 📍 Bin Edges: {details['bin_edges']}")
print(f" 🎯 Cluster Centers: {details['cluster_centers']}")
print(f" 📈 Value Counts: {details['value_counts']}")
print(f" 🔄 Mapping: {details['mapping']}")
print("-" * 60)
Processing column: BatteryVoltage_V
BatteryVoltage_V: Using provided n_bins: 3
BatteryVoltage_V discretized into 3 bins.
Processing column: BatteryCurrent_A
BatteryCurrent_A: Using provided n_bins: 3
BatteryCurrent_A discretized into 3 bins.
Processing column: BatteryPackTemp_C
BatteryPackTemp_C: Using provided n_bins: 4
BatteryPackTemp_C discretized into 4 bins.
Processing column: InverterSpeed_RearLeft_RPM
InverterSpeed_RearLeft_RPM: Using provided n_bins: 6
InverterSpeed_RearLeft_RPM discretized into 6 bins.
Processing column: Inverter_Iq_Ref_RearLeft_A
Inverter_Iq_Ref_RearLeft_A: Using provided n_bins: 3
Inverter_Iq_Ref_RearLeft_A discretized into 3 bins.
Processing column: Inverter_Id_Ref_RearLeft_A
Inverter_Id_Ref_RearLeft_A: Using provided n_bins: 6
Inverter_Id_Ref_RearLeft_A discretized into 6 bins.
Processing column: MotorTemp_RearLeft_C
MotorTemp_RearLeft_C: Using provided n_bins: 4
MotorTemp_RearLeft_C discretized into 4 bins.
Processing column: InverterTemp_RearLeft_C
InverterTemp_RearLeft_C: Using provided n_bins: 7
InverterTemp_RearLeft_C discretized into 7 bins.
Processing column: InverterSpeed_RearRight_RPM
InverterSpeed_RearRight_RPM: Using provided n_bins: 6
InverterSpeed_RearRight_RPM discretized into 6 bins.
Processing column: Inverter_Iq_Ref_RearRight_A
Inverter_Iq_Ref_RearRight_A: Using provided n_bins: 3
Inverter_Iq_Ref_RearRight_A discretized into 3 bins.
Processing column: Inverter_Id_Ref_RearRight_A
Inverter_Id_Ref_RearRight_A: Using provided n_bins: 6
Inverter_Id_Ref_RearRight_A discretized into 6 bins.
Processing column: MotorTemp_RearRight_C
MotorTemp_RearRight_C: Using provided n_bins: 4
MotorTemp_RearRight_C discretized into 4 bins.
Processing column: InverterTemp_RearRight_C
InverterTemp_RearRight_C: Using provided n_bins: 7
InverterTemp_RearRight_C discretized into 7 bins.
================================================================================
🔍 K-Means Discretization Summary
================================================================================
📌 Feature: BatteryVoltage_V
------------------------------------------------------------
📊 Method: K-Means Binning
🔢 Number of Bins: 3
📍 Bin Edges: [385.8200073242188, 453.8200073242188, 473.8800048828125, 503.1000061035156]
🎯 Cluster Centers: [463.57189172733916, 486.486566417157, 438.74000215180104]
📈 Value Counts: {0: 191299, 1: 158915, 2: 147339}
🔄 Mapping: {0: (385.8200073242188, 453.8200073242188), 1: (453.8200073242188, 473.8800048828125), 2: (473.8800048828125, 503.1000061035156)}
------------------------------------------------------------
📌 Feature: BatteryCurrent_A
------------------------------------------------------------
📊 Method: K-Means Binning
🔢 Number of Bins: 3
📍 Bin Edges: [-13.5, 0.0299999993294477, 1.090000033378601, 115.79000091552734]
🎯 Cluster Centers: [1.1704989269415709, 25.64220777929068, 68.675531492679]
📈 Value Counts: {0: 426140, 1: 55553, 2: 15860}
🔄 Mapping: {0: (-13.5, 0.0299999993294477), 1: (0.0299999993294477, 1.090000033378601), 2: (1.090000033378601, 115.79000091552734)}
------------------------------------------------------------
📌 Feature: BatteryPackTemp_C
------------------------------------------------------------
📊 Method: K-Means Binning
🔢 Number of Bins: 4
📍 Bin Edges: [5.190000057220459, 11.489999771118164, 15.729999542236328, 19.270000457763672, 23.63999938964844]
🎯 Cluster Centers: [16.574878316087663, 7.251553135798417, 12.646073472343842, 20.727677360132574]
📈 Value Counts: {0: 155692, 1: 90482, 2: 118091, 3: 133288}
🔄 Mapping: {0: (5.190000057220459, 11.489999771118164), 1: (11.489999771118164, 15.729999542236328), 2: (15.729999542236328, 19.270000457763672), 3: (19.270000457763672, 23.63999938964844)}
------------------------------------------------------------
📌 Feature: InverterSpeed_RearLeft_RPM
------------------------------------------------------------
📊 Method: K-Means Binning
🔢 Number of Bins: 6
📍 Bin Edges: [-1439.0, -1.0, 1.0, 1748.0, 3440.0, 4773.0, 14967.0]
🎯 Cluster Centers: [6145.221053681564, 23.614864468717315, 3502.5129763055565, 2254.090649862672, 8847.434798270768, 4735.365005369279]
📈 Value Counts: {0: 39808, 1: 239874, 2: 78946, 3: 56930, 4: 8245, 5: 73750}
🔄 Mapping: {0: (-1439.0, -1.0), 1: (-1.0, 1.0), 2: (1.0, 1748.0), 3: (1748.0, 3440.0), 4: (3440.0, 4773.0), 5: (4773.0, 14967.0)}
------------------------------------------------------------
📌 Feature: Inverter_Iq_Ref_RearLeft_A
------------------------------------------------------------
📊 Method: K-Means Binning
🔢 Number of Bins: 3
📍 Bin Edges: [-7.0625, 0.0, 2.4375, 52.0]
🎯 Cluster Centers: [0.660740561115186, 16.28424835630546, 37.76631710856252]
📈 Value Counts: {0: 378885, 1: 80763, 2: 37905}
🔄 Mapping: {0: (-7.0625, 0.0), 1: (0.0, 2.4375), 2: (2.4375, 52.0)}
------------------------------------------------------------
📌 Feature: Inverter_Id_Ref_RearLeft_A
------------------------------------------------------------
📊 Method: K-Means Binning
🔢 Number of Bins: 6
📍 Bin Edges: [-52.9375, -2.5625, -0.5, 0.0, 0.0, 0.0, 0.0]
🎯 Cluster Centers: [-0.08436387709837745, -4.577186134785373, -19.766811389337715, -2.091052557345818, -7.734198063287585, -33.64936440677975]
📈 Value Counts: {0: 362147, 1: 41889, 2: 1238, 3: 72673, 4: 18898, 5: 708}
🔄 Mapping: {0: (-52.9375, -2.5625), 1: (-2.5625, -0.5), 2: (-0.5, 0.0), 3: (0.0, 0.0), 4: (0.0, 0.0), 5: (0.0, 0.0)}
------------------------------------------------------------
📌 Feature: MotorTemp_RearLeft_C
------------------------------------------------------------
📊 Method: K-Means Binning
🔢 Number of Bins: 4
📍 Bin Edges: [7.0, 29.0, 33.0, 42.0, 57.0]
🎯 Cluster Centers: [37.20963659214347, 10.808418868734584, 46.09639515312672, 29.257080727488095]
📈 Value Counts: {0: 112986, 1: 36632, 2: 125359, 3: 222576}
🔄 Mapping: {0: (7.0, 29.0), 1: (29.0, 33.0), 2: (33.0, 42.0), 3: (42.0, 57.0)}
------------------------------------------------------------
📌 Feature: InverterTemp_RearLeft_C
------------------------------------------------------------
📊 Method: K-Means Binning
🔢 Number of Bins: 7
📍 Bin Edges: [0.0, 22.0, 24.0, 25.0, 25.0, 28.0, 32.0, 45.0]
🎯 Cluster Centers: [24.754142788583945, 37.25794094174108, 11.042696494079383, 31.75548726953521, 22.37958086758254, 27.918712756767647, 19.087277811067025]
📈 Value Counts: {0: 212538, 1: 37590, 2: 16629, 3: 63784, 4: 73676, 5: 68301, 6: 25035}
🔄 Mapping: {0: (0.0, 22.0), 1: (22.0, 24.0), 2: (24.0, 25.0), 3: (25.0, 25.0), 4: (25.0, 28.0), 5: (28.0, 32.0), 6: (32.0, 45.0)}
------------------------------------------------------------
📌 Feature: InverterSpeed_RearRight_RPM
------------------------------------------------------------
📊 Method: K-Means Binning
🔢 Number of Bins: 6
📍 Bin Edges: [-573.0, 0.0, 0.0, 1740.0, 3412.0, 4670.0, 10113.0]
🎯 Cluster Centers: [3404.440648373218, 24.222514019922983, 5969.147318248084, 2178.6841410450065, 4613.262369869755, 8172.621650705116]
📈 Value Counts: {0: 80440, 1: 239192, 2: 40866, 3: 52637, 4: 75064, 5: 9354}
🔄 Mapping: {0: (-573.0, 0.0), 1: (0.0, 0.0), 2: (0.0, 1740.0), 3: (1740.0, 3412.0), 4: (3412.0, 4670.0), 5: (4670.0, 10113.0)}
------------------------------------------------------------
📌 Feature: Inverter_Iq_Ref_RearRight_A
------------------------------------------------------------
📊 Method: K-Means Binning
🔢 Number of Bins: 3
📍 Bin Edges: [-50.5, -2.5, 0.0, 6.875]
🎯 Cluster Centers: [-0.6219269492611454, -36.66034967066401, -15.67274023674117]
📈 Value Counts: {0: 377231, 1: 38715, 2: 81607}
🔄 Mapping: {0: (-50.5, -2.5), 1: (-2.5, 0.0), 2: (0.0, 6.875)}
------------------------------------------------------------
📌 Feature: Inverter_Id_Ref_RearRight_A
------------------------------------------------------------
📊 Method: K-Means Binning
🔢 Number of Bins: 6
📍 Bin Edges: [-45.9375, -2.5, -0.4375, 0.0, 0.0, 0.0, 0.0]
🎯 Cluster Centers: [-0.07881202377394536, -7.499628380124779, -20.086969111969122, -2.0071903333557284, -34.17778649921508, -4.423834872734697]
📈 Value Counts: {0: 360386, 1: 19341, 2: 1295, 3: 73345, 4: 637, 5: 42549}
🔄 Mapping: {0: (-45.9375, -2.5), 1: (-2.5, -0.4375), 2: (-0.4375, 0.0), 3: (0.0, 0.0), 4: (0.0, 0.0), 5: (0.0, 0.0)}
------------------------------------------------------------
📌 Feature: MotorTemp_RearRight_C
------------------------------------------------------------
📊 Method: K-Means Binning
🔢 Number of Bins: 4
📍 Bin Edges: [9.0, 30.0, 36.0, 47.0, 67.0]
🎯 Cluster Centers: [40.664866271368155, 12.336165048540362, 52.94671602711386, 30.455701838373397]
📈 Value Counts: {0: 126824, 1: 35432, 2: 126417, 3: 208880}
🔄 Mapping: {0: (9.0, 30.0), 1: (30.0, 36.0), 2: (36.0, 47.0), 3: (47.0, 67.0)}
------------------------------------------------------------
📌 Feature: InverterTemp_RearRight_C
------------------------------------------------------------
📊 Method: K-Means Binning
🔢 Number of Bins: 7
📍 Bin Edges: [0.0, 23.0, 24.0, 25.0, 27.0, 28.0, 32.0, 46.0]
🎯 Cluster Centers: [33.33836957363041, 19.69597027058333, 25.8265265627967, 11.857350432747063, 23.40151820011179, 37.765118809449774, 29.168481139930567]
📈 Value Counts: {0: 51692, 1: 25833, 2: 173871, 3: 15829, 4: 109340, 5: 28491, 6: 92497}
🔄 Mapping: {0: (0.0, 23.0), 1: (23.0, 24.0), 2: (24.0, 25.0), 3: (25.0, 27.0), 4: (27.0, 28.0), 5: (28.0, 32.0), 6: (32.0, 46.0)}
------------------------------------------------------------
In [37]:
# Perform discretization on numeric columns using predefined bin configurations
binarization_info, binarized_fault_df = DataDiscretizer.binarize_columns(
df=imola_20250113_df['InverterFault'].to_frame(),
columns=['InverterFault'],
thresholds={'InverterFault': 0.5},
labels={'InverterFault': [0, 1]}
)
# Add the binzarized column to discretized dataframe
kmeans_discretized_imola_20250113_df['InverterFault'] = binarized_fault_df['InverterFault']
# Display summary of binarization results
print("\n📊 Binarization Overview:")
for column, info in binarization_info.items():
print(f"\n🔹 Column: {column}")
print(f" - Threshold: {info['threshold']}")
print(f" - True (%): {info['percentage_true']}")
print(" - Value Counts:")
print(info['value_counts'].to_string())
📊 Binarization Overview: 🔹 Column: InverterFault - Threshold: 0.5 - True (%): 11.620872550260977 - Value Counts: InverterFault 0 439733 1 57820
In [38]:
# Encode categorical columns using the specified encoding strategy
encoded_imola_20250113_df, _ = DataEncoder.encode_categorical_columns(
df=kmeans_discretized_imola_20250113_df,
encoding_strategy='ordinal'
)
# Visualize the distribution of discrete variables with respect to 'InverterFault'
DataVisualizer.plot_discrete_distributions(
df=encoded_imola_20250113_df,
fault_col='InverterFault'
)
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting. INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
In [39]:
# Define output file path
output_file = os.path.join(DISCRETIZED_ORIGINAL_DATASETS_IMOLA_FOLDER_NAME, 'kmeans', 'discr-20250113-imola.csv')
# Save the discretized dataset
DataProcessor.save_dataset(kmeans_discretized_imola_20250113_df, output_file, file_format="csv")
print(f"✅ Saved discretized (KMeans) data to: {output_file}")
Dataset with shape (497553, 14), saved successfully at .\data\discretized-datasets-imola\kmeans\discr-20250113-imola.csv (csv). ✅ Saved discretized (KMeans) data to: .\data\discretized-datasets-imola\kmeans\discr-20250113-imola.csv
🔷 Discretizing 20250114-imola dataset with KMeans¶
In [40]:
# Perform K-Means discretization
kmeans_discretized_imola_20250114_df, kmeans_result = DataDiscretizer.discretize_kmeans(
df=imola_20250114_df.drop(columns=['InverterFault']),
n_bins=list(kmeans_bins.values()) # Extract bin values in correct order
)
# Display K-Means discretization results in a readable format
print("\n" + "="*80)
print("🔍 K-Means Discretization Summary")
print("="*80)
for col, details in kmeans_result.items():
print(f"\n📌 Feature: {col}")
print("-" * 60)
print(f" 📊 Method: {details['method']}")
print(f" 🔢 Number of Bins: {details['bin_count']}")
print(f" 📍 Bin Edges: {details['bin_edges']}")
print(f" 🎯 Cluster Centers: {details['cluster_centers']}")
print(f" 📈 Value Counts: {details['value_counts']}")
print(f" 🔄 Mapping: {details['mapping']}")
print("-" * 60)
Processing column: BatteryVoltage_V
BatteryVoltage_V: Using provided n_bins: 3
BatteryVoltage_V discretized into 3 bins.
Processing column: BatteryCurrent_A
BatteryCurrent_A: Using provided n_bins: 3
BatteryCurrent_A discretized into 3 bins.
Processing column: BatteryPackTemp_C
BatteryPackTemp_C: Using provided n_bins: 4
BatteryPackTemp_C discretized into 4 bins.
Processing column: InverterSpeed_RearLeft_RPM
InverterSpeed_RearLeft_RPM: Using provided n_bins: 6
InverterSpeed_RearLeft_RPM discretized into 6 bins.
Processing column: Inverter_Iq_Ref_RearLeft_A
Inverter_Iq_Ref_RearLeft_A: Using provided n_bins: 3
Inverter_Iq_Ref_RearLeft_A discretized into 3 bins.
Processing column: Inverter_Id_Ref_RearLeft_A
Inverter_Id_Ref_RearLeft_A: Using provided n_bins: 6
Inverter_Id_Ref_RearLeft_A discretized into 6 bins.
Processing column: MotorTemp_RearLeft_C
MotorTemp_RearLeft_C: Using provided n_bins: 4
MotorTemp_RearLeft_C discretized into 4 bins.
Processing column: InverterTemp_RearLeft_C
InverterTemp_RearLeft_C: Using provided n_bins: 7
InverterTemp_RearLeft_C discretized into 7 bins.
Processing column: InverterSpeed_RearRight_RPM
InverterSpeed_RearRight_RPM: Using provided n_bins: 6
InverterSpeed_RearRight_RPM discretized into 6 bins.
Processing column: Inverter_Iq_Ref_RearRight_A
Inverter_Iq_Ref_RearRight_A: Using provided n_bins: 3
Inverter_Iq_Ref_RearRight_A discretized into 3 bins.
Processing column: Inverter_Id_Ref_RearRight_A
Inverter_Id_Ref_RearRight_A: Using provided n_bins: 6
Inverter_Id_Ref_RearRight_A discretized into 6 bins.
Processing column: MotorTemp_RearRight_C
MotorTemp_RearRight_C: Using provided n_bins: 4
MotorTemp_RearRight_C discretized into 4 bins.
Processing column: InverterTemp_RearRight_C
InverterTemp_RearRight_C: Using provided n_bins: 7
InverterTemp_RearRight_C discretized into 7 bins.
================================================================================
🔍 K-Means Discretization Summary
================================================================================
📌 Feature: BatteryVoltage_V
------------------------------------------------------------
📊 Method: K-Means Binning
🔢 Number of Bins: 3
📍 Bin Edges: [405.3999938964844, 465.5, 485.260009765625, 503.260009765625]
🎯 Cluster Centers: [463.01402500742046, 487.8670697842577, 443.71776983770343]
📈 Value Counts: {0: 205420, 1: 342734, 2: 55570}
🔄 Mapping: {0: (405.3999938964844, 465.5), 1: (465.5, 485.260009765625), 2: (485.260009765625, 503.260009765625)}
------------------------------------------------------------
📌 Feature: BatteryCurrent_A
------------------------------------------------------------
📊 Method: K-Means Binning
🔢 Number of Bins: 3
📍 Bin Edges: [-14.93000030517578, 0.0299999993294477, 0.0900000035762786, 116.33999633789062]
🎯 Cluster Centers: [0.6347228325262977, 70.11582684037796, 26.173025176113846]
📈 Value Counts: {0: 522464, 1: 17234, 2: 64026}
🔄 Mapping: {0: (-14.93000030517578, 0.0299999993294477), 1: (0.0299999993294477, 0.0900000035762786), 2: (0.0900000035762786, 116.33999633789062)}
------------------------------------------------------------
📌 Feature: BatteryPackTemp_C
------------------------------------------------------------
📊 Method: K-Means Binning
🔢 Number of Bins: 4
📍 Bin Edges: [15.100000381469728, 18.950000762939453, 28.540000915527344, 30.729999542236328, 33.150001525878906]
🎯 Cluster Centers: [16.650820800408827, 31.10757045999683, 28.16873758727166, 18.710720547811018]
📈 Value Counts: {0: 109826, 1: 212408, 2: 221648, 3: 59842}
🔄 Mapping: {0: (15.100000381469728, 18.950000762939453), 1: (18.950000762939453, 28.540000915527344), 2: (28.540000915527344, 30.729999542236328), 3: (30.729999542236328, 33.150001525878906)}
------------------------------------------------------------
📌 Feature: InverterSpeed_RearLeft_RPM
------------------------------------------------------------
📊 Method: K-Means Binning
🔢 Number of Bins: 6
📍 Bin Edges: [-326.0, -1.0, 0.0, 3.0, 2851.0, 4808.0, 15312.0]
🎯 Cluster Centers: [29.16145619705503, 4985.88094842793, 2209.337123656471, 9058.826691290482, 3660.076954563566, 6397.409932015105]
📈 Value Counts: {0: 360934, 1: 72128, 2: 44864, 3: 10359, 4: 70790, 5: 44649}
🔄 Mapping: {0: (-326.0, -1.0), 1: (-1.0, 0.0), 2: (0.0, 3.0), 3: (3.0, 2851.0), 4: (2851.0, 4808.0), 5: (4808.0, 15312.0)}
------------------------------------------------------------
📌 Feature: Inverter_Iq_Ref_RearLeft_A
------------------------------------------------------------
📊 Method: K-Means Binning
🔢 Number of Bins: 3
📍 Bin Edges: [-7.625, 0.0, 0.0, 51.875]
🎯 Cluster Centers: [0.4015144414907743, 39.82179805002807, 19.093881074086724]
📈 Value Counts: {0: 493474, 1: 36103, 2: 74147}
🔄 Mapping: {0: (-7.625, 0.0), 1: (0.0, 0.0), 2: (0.0, 51.875)}
------------------------------------------------------------
📌 Feature: Inverter_Id_Ref_RearLeft_A
------------------------------------------------------------
📊 Method: K-Means Binning
🔢 Number of Bins: 6
📍 Bin Edges: [-52.9375, -2.0625, 0.0, 0.0, 0.0, 0.0, 0.0]
🎯 Cluster Centers: [-0.05359657265552431, -7.8063245315163, -19.932142857142928, -2.2288847354971297, -4.615025932316691, -32.822265624999964]
📈 Value Counts: {0: 479380, 1: 21719, 2: 980, 3: 52835, 4: 48106, 5: 704}
🔄 Mapping: {0: (-52.9375, -2.0625), 1: (-2.0625, 0.0), 2: (0.0, 0.0), 3: (0.0, 0.0), 4: (0.0, 0.0), 5: (0.0, 0.0)}
------------------------------------------------------------
📌 Feature: MotorTemp_RearLeft_C
------------------------------------------------------------
📊 Method: K-Means Binning
🔢 Number of Bins: 4
📍 Bin Edges: [4.0, 27.0, 31.0, 38.0, 66.0]
🎯 Cluster Centers: [12.245433175298668, 29.69924260576422, 41.070378301215456, 57.90890176371201]
📈 Value Counts: {0: 76147, 1: 342226, 2: 154110, 3: 31241}
🔄 Mapping: {0: (4.0, 27.0), 1: (27.0, 31.0), 2: (31.0, 38.0), 3: (38.0, 66.0)}
------------------------------------------------------------
📌 Feature: InverterTemp_RearLeft_C
------------------------------------------------------------
📊 Method: K-Means Binning
🔢 Number of Bins: 7
📍 Bin Edges: [0.0, 22.0, 24.0, 25.0, 27.0, 29.0, 33.0, 45.0]
🎯 Cluster Centers: [17.068451324289217, 27.91318319685784, 22.124766433674626, 35.749308681884244, 10.768779002317565, 24.868292006721873, 31.239327123657674]
📈 Value Counts: {0: 28166, 1: 116383, 2: 101149, 3: 74857, 4: 27584, 5: 173133, 6: 82452}
🔄 Mapping: {0: (0.0, 22.0), 1: (22.0, 24.0), 2: (24.0, 25.0), 3: (25.0, 27.0), 4: (27.0, 29.0), 5: (29.0, 33.0), 6: (33.0, 45.0)}
------------------------------------------------------------
📌 Feature: InverterSpeed_RearRight_RPM
------------------------------------------------------------
📊 Method: K-Means Binning
🔢 Number of Bins: 6
📍 Bin Edges: [-239.0, 0.0, 0.0, 0.0, 2868.0, 4727.0, 11698.0]
🎯 Cluster Centers: [30.720205704699083, 4819.449302477875, 3531.1125359310913, 8502.343852388374, 6177.489420483198, 2079.717369619197]
📈 Value Counts: {0: 360872, 1: 73998, 2: 71836, 3: 11483, 4: 46465, 5: 39070}
🔄 Mapping: {0: (-239.0, 0.0), 1: (0.0, 0.0), 2: (0.0, 0.0), 3: (0.0, 2868.0), 4: (2868.0, 4727.0), 5: (4727.0, 11698.0)}
------------------------------------------------------------
📌 Feature: Inverter_Iq_Ref_RearRight_A
------------------------------------------------------------
📊 Method: K-Means Binning
🔢 Number of Bins: 3
📍 Bin Edges: [-50.375, 0.0, 0.0, 7.375]
🎯 Cluster Centers: [-0.4094093342567513, -39.35148427334678, -18.911576733068237]
📈 Value Counts: {0: 493676, 1: 35354, 2: 74694}
🔄 Mapping: {0: (-50.375, 0.0), 1: (0.0, 0.0), 2: (0.0, 7.375)}
------------------------------------------------------------
📌 Feature: Inverter_Id_Ref_RearRight_A
------------------------------------------------------------
📊 Method: K-Means Binning
🔢 Number of Bins: 6
📍 Bin Edges: [-46.125, -2.0625, 0.0, 0.0, 0.0, 0.0, 0.0]
🎯 Cluster Centers: [-4.588396467617704, -0.054102910237402435, -7.724731182795729, -32.82863729508191, -2.2362862332032822, -18.052535377358538]
📈 Value Counts: {0: 46739, 1: 479102, 2: 20925, 3: 976, 4: 54922, 5: 1060}
🔄 Mapping: {0: (-46.125, -2.0625), 1: (-2.0625, 0.0), 2: (0.0, 0.0), 3: (0.0, 0.0), 4: (0.0, 0.0), 5: (0.0, 0.0)}
------------------------------------------------------------
📌 Feature: MotorTemp_RearRight_C
------------------------------------------------------------
📊 Method: K-Means Binning
🔢 Number of Bins: 4
📍 Bin Edges: [4.0, 29.0, 34.0, 42.0, 77.0]
🎯 Cluster Centers: [31.552076309952025, 45.704185796906636, 13.3894029183318, 67.0992184107673]
📈 Value Counts: {0: 332224, 1: 167710, 2: 71548, 3: 32242}
🔄 Mapping: {0: (4.0, 29.0), 1: (29.0, 34.0), 2: (34.0, 42.0), 3: (42.0, 77.0)}
------------------------------------------------------------
📌 Feature: InverterTemp_RearRight_C
------------------------------------------------------------
📊 Method: K-Means Binning
🔢 Number of Bins: 7
📍 Bin Edges: [0.0, 22.0, 24.0, 25.0, 28.0, 30.0, 34.0, 46.0]
🎯 Cluster Centers: [21.877696067917938, 36.47059690994887, 28.14853160375501, 13.040176704302757, 31.41631274235912, 24.92583922101859, 8.526512829121202e-14]
📈 Value Counts: {0: 93284, 1: 88154, 2: 124115, 3: 38256, 4: 92063, 5: 166166, 6: 1686}
🔄 Mapping: {0: (0.0, 22.0), 1: (22.0, 24.0), 2: (24.0, 25.0), 3: (25.0, 28.0), 4: (28.0, 30.0), 5: (30.0, 34.0), 6: (34.0, 46.0)}
------------------------------------------------------------
In [43]:
# Perform discretization on numeric columns using predefined bin configurations
binarization_info, binarized_fault_df = DataDiscretizer.binarize_columns(
df=imola_20250114_df['InverterFault'].to_frame(),
columns=['InverterFault'],
thresholds={'InverterFault': 0.5},
labels={'InverterFault': [0, 1]}
)
# Add the binzarized column to discretized dataframe
kmeans_discretized_imola_20250114_df['InverterFault'] = binarized_fault_df['InverterFault']
# Display summary of binarization results
print("\n📊 Binarization Overview:")
for column, info in binarization_info.items():
print(f"\n🔹 Column: {column}")
print(f" - Threshold: {info['threshold']}")
print(f" - True (%): {info['percentage_true']}")
print(" - Value Counts:")
print(info['value_counts'].to_string())
📊 Binarization Overview: 🔹 Column: InverterFault - Threshold: 0.5 - True (%): 12.56931975538491 - Value Counts: InverterFault 0 527840 1 75884
In [46]:
# Encode categorical columns using the specified encoding strategy
encoded_imola_20250114_df, _ = DataEncoder.encode_categorical_columns(
df=kmeans_discretized_imola_20250114_df,
encoding_strategy='ordinal'
)
# Visualize the distribution of discrete variables with respect to 'InverterFault'
DataVisualizer.plot_discrete_distributions(
df=encoded_imola_20250114_df,
fault_col='InverterFault'
)
In [47]:
# Define output file path
output_file = os.path.join(DISCRETIZED_ORIGINAL_DATASETS_IMOLA_FOLDER_NAME, 'kmeans', 'discr-20250114-imola.csv')
# Save the discretized dataset
DataProcessor.save_dataset(kmeans_discretized_imola_20250114_df, output_file, file_format="csv")
print(f"✅ Saved discretized (KMeans) data to: {output_file}")
Dataset with shape (603724, 14), saved successfully at .\data\discretized-datasets-imola\kmeans\discr-20250114-imola.csv (csv). ✅ Saved discretized (KMeans) data to: .\data\discretized-datasets-imola\kmeans\discr-20250114-imola.csv